Lag analysis at a state-level

Use most correlated shift time to build linear models

\[y = \beta_{0} + \beta_{1}(\sum I(A_i = 1)) + \beta_{2}x_1 + \beta_{3}x_2\]

# select best lag number based on intial exploration
case.shifted.days.spearman <- 37 # based spearman correlation
doc.visit.shifted.days.spearman <- 62 # based spearman correlation for doctor visit
cum.death.shifted.days.spearman <- 55
death.shifted.days.spearman <- 55

case.vec <- c("confirmed_7dav_incidence_num", "confirmed_7dav_cumulative", "confirmed_7dav_cumulative_prop")
doc.vec <- c("smoothed_cli", "smoothed_adj_cli")
cum.death.vec <- c("deaths_7dav_cumulative_num")
death.vec <- c("deaths_7dav_incidence_num")

# Make two copies for shifting the data
selected.df <- intervention_mobility_case
factored_data <- intervention_mobility_case
# Change the data by shifting the covariates
# Shift the case count column vector by the specified shift time
factored_data <- shiftDays(selected.df, factored_data, case.shifted.days.spearman, case.vec)

# Shift the  doctor column vector by the specified shift time
factored_data <-shiftDays(selected.df, factored_data, doc.visit.shifted.days.spearman, doc.vec)
  
# Shift the death count column vector by the specified shift time
factored_data <-shiftDays(selected.df, factored_data, cum.death.shifted.days.spearman, cum.death.vec)

# Shift the cum death count column vector by the specified shift time
factored_data <-shiftDays(selected.df, factored_data, death.shifted.days.spearman, death.vec)
  
# We specifically look at emergency declaration
factored_data%>% 
  mutate(EmergDec.duration = cumsum(EmergDec)) %$% 
  lm(full_time_work_prop ~ EmergDec.duration + smoothed_cli+smoothed_adj_cli ) %>% 
  summary()
## 
## Call:
## lm(formula = full_time_work_prop ~ EmergDec.duration + smoothed_cli + 
##     smoothed_adj_cli)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0169129 -0.0056246  0.0007641  0.0052080  0.0175204 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        3.500e-02  1.552e-03  22.550  < 2e-16 ***
## EmergDec.duration  7.469e-05  2.180e-05   3.427 0.000745 ***
## smoothed_cli      -5.172e-03  8.449e-04  -6.121 5.00e-09 ***
## smoothed_adj_cli   5.534e-03  1.076e-03   5.144 6.54e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007475 on 195 degrees of freedom
##   (458 observations deleted due to missingness)
## Multiple R-squared:  0.2834, Adjusted R-squared:  0.2723 
## F-statistic:  25.7 on 3 and 195 DF,  p-value: 4.702e-14
# We specifically look at emergency declaration
factored_data%>% 
  mutate(EmergDec.duration = cumsum(EmergDec)) %$% 
  lm(full_time_work_prop ~ EmergDec.duration + smoothed_cli+smoothed_adj_cli+confirmed_7dav_cumulative_prop+deaths_7dav_cumulative_num) %>% 
  summary()
## 
## Call:
## lm(formula = full_time_work_prop ~ EmergDec.duration + smoothed_cli + 
##     smoothed_adj_cli + confirmed_7dav_cumulative_prop + deaths_7dav_cumulative_num)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0172255 -0.0058456  0.0008801  0.0050006  0.0178635 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     3.906e-02  4.804e-03   8.131 6.53e-14 ***
## EmergDec.duration               1.498e-05  7.268e-05   0.206    0.837    
## smoothed_cli                   -5.218e-03  8.671e-04  -6.017 9.61e-09 ***
## smoothed_adj_cli                5.941e-03  1.209e-03   4.914 1.99e-06 ***
## confirmed_7dav_cumulative_prop -3.348e-06  6.209e-06  -0.539    0.590    
## deaths_7dav_cumulative_num      1.238e-06  1.658e-06   0.746    0.456    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007589 on 181 degrees of freedom
##   (470 observations deleted due to missingness)
## Multiple R-squared:  0.261,  Adjusted R-squared:  0.2405 
## F-statistic: 12.78 on 5 and 181 DF,  p-value: 1.201e-10

Model with shifting the covariates (weekends included)

Model without shifting the covariates (weekends included)

intervention.lm <- intervention_mobility_case %>% 
  mutate(EmergDec.duration = cumsum(EmergDec)) 

lm.fit.no.lag <- lm(full_time_work_prop ~ EmergDec.duration + smoothed_cli+smoothed_adj_cli, data =intervention.lm
) 

intervention.lm$predlm <- c(rep(NA, nrow(intervention.lm) - length(predict(lm.fit.no.lag))), predict(lm.fit.no.lag))

intervention.lm%>% 
  mutate(policy.duration = cumsum(EmergDec), EmergDeclaration = as.factor(EmergDec)) %>% 
  ggplot(aes(x = time_value, y = full_time_work_prop, color = EmergDeclaration)) +
  geom_point() + 
  geom_line(aes(x = time_value, y = predlm, colour="fitted value"), size = 1)+
  labs(title = "Covariates selected WITHOUT most correlated number of shift")

Weekend effects

We suspect that the mobility signal is lower than usual during the weekend.

intervention_mobility_case$weekday <- weekdays(as.Date(intervention_mobility_case$time_value)) 

p <- ggplot(intervention_mobility_case, aes(x=weekday, y=full_time_work_prop)) + 
  geom_boxplot()
p

Re-plot the regression line after dropping weekends

Covariates shifted

## 
## Call:
## lm(formula = full_time_work_prop ~ EmergDec.duration + smoothed_cli + 
##     smoothed_adj_cli)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.020355 -0.002754 -0.000059  0.002894  0.012624 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        3.876e-02  1.166e-03  33.233  < 2e-16 ***
## EmergDec.duration  6.992e-05  1.644e-05   4.254 3.86e-05 ***
## smoothed_cli       8.954e-04  7.488e-04   1.196    0.234    
## smoothed_adj_cli  -8.490e-04  8.843e-04  -0.960    0.339    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004686 on 137 degrees of freedom
##   (328 observations deleted due to missingness)
## Multiple R-squared:  0.4619, Adjusted R-squared:  0.4502 
## F-statistic: 39.21 on 3 and 137 DF,  p-value: < 2.2e-16

Covariates not shifted

## 
## Call:
## lm(formula = full_time_work_prop ~ policy.duration + smoothed_cli + 
##     smoothed_adj_cli)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.030772 -0.006223 -0.000380  0.005593  0.024096 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.594e-02  1.289e-03  43.408  < 2e-16 ***
## policy.duration   4.259e-05  1.433e-05   2.973 0.003353 ** 
## smoothed_cli      2.032e-03  1.033e-03   1.966 0.050791 .  
## smoothed_adj_cli -5.075e-03  1.322e-03  -3.839 0.000171 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.008987 on 181 degrees of freedom
##   (284 observations deleted due to missingness)
## Multiple R-squared:  0.1665, Adjusted R-squared:  0.1527 
## F-statistic: 12.05 on 3 and 181 DF,  p-value: 3.128e-07

What will happen if we add more covidcast signals as covariates?

Adding covidcast signals

## 
## Call:
## lm(formula = full_time_work_prop ~ EmergDec.duration + smoothed_cli + 
##     smoothed_adj_cli + confirmed_7dav_incidence_num + confirmed_7dav_cumulative + 
##     confirmed_7dav_cumulative_prop + deaths_7dav_incidence_num + 
##     deaths_7dav_cumulative_num)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0194675 -0.0028639  0.0000091  0.0027506  0.0122356 
## 
## Coefficients: (1 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     4.617e-02  4.227e-03  10.923   <2e-16 ***
## EmergDec.duration              -5.888e-05  6.727e-05  -0.875   0.3831    
## smoothed_cli                    8.325e-04  7.602e-04   1.095   0.2755    
## smoothed_adj_cli               -1.394e-03  1.009e-03  -1.382   0.1695    
## confirmed_7dav_incidence_num    8.297e-07  3.475e-07   2.387   0.0185 *  
## confirmed_7dav_cumulative      -8.415e-09  1.170e-08  -0.719   0.4732    
## confirmed_7dav_cumulative_prop         NA         NA      NA       NA    
## deaths_7dav_incidence_num       8.577e-05  3.421e-05   2.507   0.0135 *  
## deaths_7dav_cumulative_num      1.314e-06  1.277e-06   1.029   0.3055    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004702 on 125 degrees of freedom
##   (336 observations deleted due to missingness)
## Multiple R-squared:  0.4483, Adjusted R-squared:  0.4174 
## F-statistic: 14.51 on 7 and 125 DF,  p-value: 1.014e-13

Adding other policy duration

# Try to add other intervention covariates

factored_data.without.weekend %$%  
  lm(full_time_work_prop ~EmergDec.duration +StayAtHomeDuration+PublicMaskDuration+SchoolCloseDuration+GathRestrictDuration+BarRestrictDuration+NEBusinessCloseDuration+ RestaurantRestrictDuration+  smoothed_cli+smoothed_adj_cli+confirmed_7dav_incidence_num+confirmed_7dav_cumulative+confirmed_7dav_cumulative_prop+deaths_7dav_incidence_num+deaths_7dav_cumulative_num+ SchoolCloseDuration) %>% 
  summary()
## 
## Call:
## lm(formula = full_time_work_prop ~ EmergDec.duration + StayAtHomeDuration + 
##     PublicMaskDuration + SchoolCloseDuration + GathRestrictDuration + 
##     BarRestrictDuration + NEBusinessCloseDuration + RestaurantRestrictDuration + 
##     smoothed_cli + smoothed_adj_cli + confirmed_7dav_incidence_num + 
##     confirmed_7dav_cumulative + confirmed_7dav_cumulative_prop + 
##     deaths_7dav_incidence_num + deaths_7dav_cumulative_num + 
##     SchoolCloseDuration)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0194675 -0.0028639  0.0000091  0.0027506  0.0122356 
## 
## Coefficients: (8 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     4.617e-02  4.227e-03  10.923   <2e-16 ***
## EmergDec.duration              -5.888e-05  6.727e-05  -0.875   0.3831    
## StayAtHomeDuration                     NA         NA      NA       NA    
## PublicMaskDuration                     NA         NA      NA       NA    
## SchoolCloseDuration                    NA         NA      NA       NA    
## GathRestrictDuration                   NA         NA      NA       NA    
## BarRestrictDuration                    NA         NA      NA       NA    
## NEBusinessCloseDuration                NA         NA      NA       NA    
## RestaurantRestrictDuration             NA         NA      NA       NA    
## smoothed_cli                    8.325e-04  7.602e-04   1.095   0.2755    
## smoothed_adj_cli               -1.394e-03  1.009e-03  -1.382   0.1695    
## confirmed_7dav_incidence_num    8.297e-07  3.475e-07   2.387   0.0185 *  
## confirmed_7dav_cumulative      -8.415e-09  1.170e-08  -0.719   0.4732    
## confirmed_7dav_cumulative_prop         NA         NA      NA       NA    
## deaths_7dav_incidence_num       8.577e-05  3.421e-05   2.507   0.0135 *  
## deaths_7dav_cumulative_num      1.314e-06  1.277e-06   1.029   0.3055    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004702 on 125 degrees of freedom
##   (336 observations deleted due to missingness)
## Multiple R-squared:  0.4483, Adjusted R-squared:  0.4174 
## F-statistic: 14.51 on 7 and 125 DF,  p-value: 1.014e-13
# Predict the mobility
new.pred <- factored_data.without.weekend %$%  
  lm(full_time_work_prop ~EmergDec.duration +StayAtHomeDuration+PublicMaskDuration+SchoolCloseDuration+GathRestrictDuration+BarRestrictDuration+NEBusinessCloseDuration+ RestaurantRestrictDuration+  smoothed_cli+smoothed_adj_cli+confirmed_7dav_incidence_num+confirmed_7dav_cumulative+confirmed_7dav_cumulative_prop+deaths_7dav_incidence_num+deaths_7dav_cumulative_num+ SchoolCloseDuration)%>%
  predict()

# Pad the fitted values with NA 
factored_data.without.weekend$predlm <- c(rep(NA, nrow(factored_data.without.weekend) - length(new.pred)), new.pred)

# Plot the graph
factored_data.without.weekend %>%
  ggplot(aes(x = time_value, y = full_time_work_prop, color = EmergDeclaration)) +
  geom_point() + 
  geom_line(aes(y = predlm, colour="fitted value"), size = 1) +
   labs(title = "All covariates selected WITH most correlated number of shift (weekends dropped)")